## **Aim of the Script**
######### Transcriptional Factor analysis in neighborhood Zone (TFinZONE) ###############################################
##### This Script aims to analyze the DNA-Motifs associated with the chromatin States areas as scRNA-seq data. Instead of an expression of genes, we propose an enrichment score. The multiplication of the motif score on the Peak and the log10-Pvalue of the motif in the chromatin state of interest.
## Input files
##
## **Input files**
##### 08MOTIFS_on_PEAKS
- Folder containing all the output results from homer motifs of all Peaks files used in the program.
##### 08TSS_Enh_w_04dpci_fHeatmap_ALL.xlsx
- Matrix file, Containing the log10Pvalue from each motifs.
## **Output files**
#### 03OUTPUT
- Folder containing all the plots used in figure 4. As well as additional output from the Script
workdir = "./"
setwd(workdir)
PTHA="../03OUTPUT/"
dir.create(PTHA)
PROJECT="03a_TFHOOD_at_04dpci"
PORT=paste(PTHA,PROJECT,"/",sep="")
PTHA1=paste(PTHA,PROJECT,"/",sep="")
dir.create(PTHA)
dir.create(PORT)
dir.create(PTHA1)
TRY="_"
TRY1="02Ia_"
PVALUE=0.05
#### FOR VOLCANO ###
upcol<- "#B2182B" # magenta from PiyG
nc<- "#000000" # black
downcol<- "grey" # green from PiyG
CC= c(downcol, "#F7F7F7",upcol)
CO3<- c("lightgrey","#B2182B")
CO_ALU3=c("#d7191c","#d8b365","#542788","grey", "#91bfdb", "grey","grey")
NAME33<- c("Mo_Name","Consensus","P-value","Log_P-value","q-value_Benja","Nor_of_Tar","Per_of_Tar","No_of_Tar_Backg","Per_of_Tar_Backg")
NAME34<- c("HOmer_NAME",NAME33)
mat_m<- data.frame("mask_500_FIND")
## Here you write the Path to the
WORK1="../01DATA_ORI/08MOTIFS_on_PEAKS/"
#WORK1="Y:/002ZF/000FIGURES_ANALY/00GITHUB_01HIS_Chroma_Factors/01HIS_Chrom_Factors_TEST/04MOtifs_Analy_as_SC_MOAC_Fig3_5/02Fig4_MO_MOAC_04dpci/01DATA_ORI/08MOTIFS_on_PEAKS/"
NAME_G2 <-data.frame(list.files(path=WORK1,pattern="*_FIND"))
colnames(NAME_G2)<- "ENH4"
#NAME_G2$GROUP <- gsub("CORR1_01d_", "", NAME_G2$ENH4)
NAME_G2$GROUP <- gsub("*_mask_500_FIND", "", NAME_G2$ENH4)
### this is important to check the SYMBOL
NAME_G22<- data.frame(c("00A_Ia","01Ia_A","02U_Ea","03U_Ia","04U_Pa"))
NAME_G2c<- as.character(t(NAME_G2[,2]))
#WORK11A11="../01DATA_ORI/07aMO_forMOAC/08TSS_Enh_w_04dpci_fHeatmap_ALL.xlsx"
WORK11A11="../01DATA_ORI/07aMO_forSeurat_ANALY/08TSS_Enh_w_04dpci_fHeatmap_ALL.xlsx"
MAT_1A= read_excel(WORK11A11)
colnames(MAT_1A)[1]<- "SYMBOL"
dim(MAT_1A)
## [1] 338 11
##### Set resolution for CLustering
RES=0.5
## sequential:
## - args: function (..., envir = parent.frame())
## - tweaked: FALSE
## - call: NULL
## sequential:
## - args: function (..., workers = 16, envir = parent.frame())
## - tweaked: TRUE
## - call: plan("sequential", workers = 16)
##
## 00A_Ia_04dcpi 01Ia_A_04dcpi 02U_Ea_04dcpi 03U_Ia_04dcpi 04U_Pa_04dcpi
## 39886 291670 14945 35034 37650
## [1] 3
b7<- AC_RE_VOL_SC_META
rownames(b7)<- b7$PositionID2
mat_c1<- mat_cm_TMM_BOTH_SC[,rownames(b7)]
rownames(mat_c1)<- mat_cm_TMM_BOTH_SC$SYMBOL
dim(mat_c1)
## [1] 240 4647
dim(b7)
## [1] 4647 5
smartseq2 <- CreateSeuratObject(mat_c1,project = "04d", meta.data = b7)
## png
## 2
arrangeQC_TF <- ggarrange(EC00,EC001, ncol = 2,nrow =1,common.legend = T, align = c("hv"),legend="none")
print(arrangeQC_TF)
grid.arrange( plot1,plot2, ncol = 1,nrow = 2)
all.genes <- rownames(smartseq2)
smartseq2 <- ScaleData(smartseq2, features = all.genes,model.use
="linear")
smartseq2 <- RunPCA(smartseq2, features = VariableFeatures(object = smartseq2))
VizDimLoadings(smartseq2, dims = 1:4, reduction = "pca")
grid.arrange( P1, ncol = 1,nrow = 1)
print(P2)
## png
## 2
#grid.arrange( P3, ncol = 1,nrow = 1)
set.seed(1)
smartseq2 <- FindNeighbors(smartseq2, dims = 1:10)
smartseq2 <- FindClusters(smartseq2, resolution = RES, random.seed= 1, algorithm=1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4647
## Number of edges: 146524
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8811
## Number of communities: 12
## Elapsed time: 0 seconds
#########here
## png
## 2
## png
## 2
print(arrange1t)
print(arrange1)
print(arrange1bt)
print(arrange1c)
print(arrange1cat)
## png
## 2
print(arrange3)
print(arrange3t)
print(arrange2)
grid.arrange( P20,P201, ncol = 1,nrow = 2)
grid.arrange( P05ah_bino1, ncol = 1,nrow = 1)
grid.arrange( P05ah_bino, ncol = 1,nrow = 1)
DATA_SC<- data.frame(smartseq2@assays[["RNA"]]@counts@i)
DATA_SC<- data.frame(smartseq2@assays[["RNA"]]@counts@i)
DATA_SC<- GetAssay(smartseq2,assay = "RNA")
DATA_SC1<- data.frame(DATA_SC@scale.data)
DATA_SC2<- data.frame(DATA_SC@scale.data)
#dim(DATA_SC1)
META_SEU<- data.frame(smartseq2@meta.data)
colnames(DATA_SC1)<- META_SEU$PositionID
colnames(META_SEU)[4]<- "SYMBOL_TF"
GENES001<- c("CEBP","Cdx2", "Nanog","Smad2","Smad4","Sox9","Sox10","Mef2c","Gata4")
GENES002 <- c("Eomes","FOXA1", "FOXM1","CLOCK","MITF", "MafA", "Mef2b")
GENES003 <- c("Eomes","Oct4","Foxh1","CUX1","Bapx1", "Elk4","Foxo1","Fli1")
GENES004<- c("Rbp7","Rgcc", "Fabp4","Egfl7","Flt1", "Cd36","Nrp1","Tpm1", "Rbp1")
GENES005<- c("Mfap5","Dcn","Mgp","Cfh","Serpine2","Mt1","Gpx3","Rbp1","Rcn3")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
## png
## 2
## png
## 2
print(arrange12)
## png
## 2
## png
## 2
print(P05a)
print(P05b)
print(P05c)
print(P05a1)
print(P05b1)
print(P05c1)
## png
## 2
## png
## 2
print(arrange1CM41)
print(arrange1CM42)
print(arrange1CM43)